# delimit;
clear;
set more off;
set mem 20m;
log using table8_14growing.log, replace;

use data_urban_alt;
keep if age <=23 & sex!=3;
keep if year<=2000;
sort year dep_ocu age sex urban;
save one, replace;

use pop_sex;
keep if year<=2000 & year>=1992;
sort year dep_ocu age sex;
save pop, replace;

use one;
merge year dep_ocu age sex using pop;

if population!=0 {
  replace death=0 if death==.;
  replace violent_a=0 if violent_a==.;
  replace disease=0 if disease==.;
  replace homicide=0 if homicide==.;
  replace accident=0 if accident==.;
};

gen newage=(age-7)*5;

***keep only males aged 15-59, according to the SAS file - double check the age filter here!;
keep if newage>=15 & newage<=60;
keep if sex==1;

replace newage=15 if newage==20;
replace newage=25 if newage==30;
replace newage=35 if newage==40;
replace newage=45 if newage==50;
replace newage=55 if newage==60;


******************************************
***generate growing department definitions
***NOTE: uncomment line at end if you want a 14 dept. definition of growing rather than a 9 dept.;

gen grow94=1 if 
	dep_ocu==13 |
  	dep_ocu==18 |
	dep_ocu==19 |
	dep_ocu==50 |
	dep_ocu==52 |
	dep_ocu==86 |
	dep_ocu==95 |
	dep_ocu==97 |
	dep_ocu==99 |
      dep_ocu==20 |
	dep_ocu==54 |
	dep_ocu==94 |
      dep_ocu==44 |
	dep_ocu==47;
replace grow94=0 if grow94==.;

gen DMZ=1 if
	dep_ocu==50 |
	dep_ocu==18;
replace DMZ=0 if DMZ==.;

gen prov_type="Non-growing" if grow94==0;
replace prov_type="Growing" if grow94==1;
replace prov_type="DMZ" if grow94==1 & DMZ==1;
drop _merge;
save temp, replace;

***the following creates dept_cat, a set linking dept. numbers with growing and DMZ status, and prov_type;
keep dep_ocu DMZ grow94;
collapse grow94 DMZ, by(dep_ocu);
gen prov_type="Non-growing" if grow94==0;
replace prov_type="Growing" if grow94==1;
replace prov_type="DMZ" if grow==1 & DMZ==1;

sort dep_ocu;
save dept_cat, replace;
**************************************************;


********************************************************
****collapsing to sum across groups and categories******;
use temp;
collapse (sum) death violent_a accident disease homicide population, by(urban year dep_ocu newage);
sort dep_ocu ;
save temp1, replace;


use temp;
collapse (sum) violent if violent!=., by(year dep_ocu newage);
sort year dep_ocu newage ;
save temp2, replace;

use temp1;
merge dep_ocu using dept_cat;
drop _merge;
sort year dep_ocu newage ;
merge year dep_ocu newage using temp2;
drop _merge; 
********************************************************;

********************************************************;
gen arate=100000*(accident/population);
gen vrate=100000*(violent_a/population);
gen drate=100000*(disease/population);
gen hrate=100000*(homicide/population);

gen lnvrate=log(violent_a/population) if violent_a>0;
gen lnvratio=log(violent_a/(death-violent_a)) if violent_a>0 & violent_a<death;
gen lndrate=log(disease/population) if disease>0;
gen post=1 if year>=1995;
replace post=0 if year<1995;

gen d9597=1 if year<=1997 & year>=1995;
replace d9597=0 if d9597==.;
gen d9800=1 if year<=2000 & year>=1998;
replace d9800=0 if d9800==.;

gen negyear94=-year if year>=1994;
replace negyear94=0 if year<1994;

gen negyear93=-year if year>=1993;
replace negyear93=0 if year<1993;

gen growing=1 if prov_type=="Growing";
replace growing=0 if growing==.;

gen trend=year-1989;



gen bigcity=1 if
	dep_ocu==5 |
	dep_ocu==11 |
	dep_ocu==76;
replace bigcity=0 if bigcity==.;


keep if bigcity==0;

gen clusterid=dep_ocu+(year/100);

label variable lnvratio "logit violent death rate";
label variable vrate "violent death rate";
label variable drate "disease death rate";
label variable DMZ "Meta or Cauqeta";

**************************************************
****     Generate all the dummy variables     ****;

***interaction variables***;
gen growing_negyear93=growing*negyear93;
gen DMZ_negyear93=DMZ*negyear93;
gen growing_negyear94=growing*negyear94;
gen DMZ_negyear94=DMZ*negyear94;

***year, department, age dummies***;
separate year, by(year);
separate dep_ocu, by(dep_ocu);
separate newage, by(newage);
drop year2000 newage55 dep_ocu99;
recode year1992-newage45 (nonmiss=1) (missing=0);

***transform interaction variables into a set of dummies***;
local i=1993;
while `i'<=2000 {;
gen growing_negyear93_`i'=1 if growing_negyear93==-`i';
replace growing_negyear93_`i'=0 if growing_negyear93_`i'==.;

gen DMZ_negyear93_`i'=1 if DMZ_negyear93==-`i';
replace DMZ_negyear93_`i'=0 if DMZ_negyear93_`i'==.;
local i=`i'+1;
};

local j=1994;
while `j'<=2000 {;
gen growing_negyear94_`j'=1 if growing_negyear94==-`j';
replace growing_negyear94_`j'=0 if growing_negyear94_`j'==.;

gen DMZ_negyear94_`j'=1 if DMZ_negyear94==-`j';
replace DMZ_negyear94_`j'=0 if DMZ_negyear94_`j'==.;
local j=`j'+1;
};

***generate trend terms***;
gen growing_trend=trend if prov_type=="Growing";
gen non_growing_trend=trend if prov_type=="Non-growing";
gen DMZ_trend=trend if prov_type=="DMZ";
recode growing_trend-DMZ_trend (missing=0);

keep if urban!=.;

***Note that the st. errors for the regressions are slightly different than those reported in the paper, 
***since SAS and Stata must calculate st. errors differently when clustering;

***Note: paper reports unweighted results in Table 4, but weighted results are provided below as well;

**************************************************;
*****            data summary                *****;
**************************************************;
sort urban;
by urban: summarize;

**************************************************;
*****empirical model - no trends - unweighted*****;
**************************************************;
***urban***;
reg lnvrate dep_ocu8-dep_ocu97 year1992-year1999 newage15-newage45 growing_negyear94_1994-DMZ_negyear94_2000 if urban==1, cluster(clusterid);
***rural***;
reg lnvrate dep_ocu8-dep_ocu97 year1992-year1999 newage15-newage45 growing_negyear94_1994-DMZ_negyear94_2000 if urban==0, cluster(clusterid);

**************************************************;
*****empirical model - w/trends - unweighted *****;
**************************************************;
***urban***;
reg lnvrate dep_ocu8-dep_ocu97 year1992-year1999 newage15-newage45 growing_negyear94_1994-DMZ_negyear94_2000 growing_trend DMZ_trend if urban==1, cluster(clusterid);
***rural***;
reg lnvrate dep_ocu8-dep_ocu97 year1992-year1999 newage15-newage45 growing_negyear94_1994-DMZ_negyear94_2000 growing_trend DMZ_trend if urban==0, cluster(clusterid);

erase one.dta;
erase pop.dta;
erase temp.dta;
erase temp1.dta;
erase temp2.dta;
erase dept_cat.dta;
log close;

